linear_threshold <- 46
# Load data for handcounts
HC <- suppressWarnings(
read_excel("../../Data/Processed/hd_hand_counted.xlsx") %>%
dplyr::select(camera_id, handcount)
)
# Load list of bad images that were handcounted
bad_images <- read_excel("../../Data/Processed/bad_images.xlsx") %>%
filter(status == "bad") %>%
rename(camera_id = cameraid)
# Drop bad images
HC <- HC %>%
filter(!(camera_id %in% bad_images$camera_id))
linear_area <- read_csv("../../Data/Processed/area_summation_HC_linear_fine.csv",
col_types = "cii") %>%
filter(lower_thresh == linear_threshold) %>%
dplyr::select(-lower_thresh) %>%
rename(area_linear = area)
# Join handcounts with areas
HC <- inner_join(HC, linear_area) %>%
as.data.frame()
## Joining, by = "camera_id"
Check these areas: should be in the test_case batch but aren’t.
HC %>%
filter(is.na(area_linear))
## [1] camera_id handcount area_linear
## <0 rows> (or 0-length row.names)
plot_jpeg = function(path, add=FALSE) {
# https://stackoverflow.com/a/28729601/168137
require('jpeg')
jpg = readJPEG(path, native = TRUE) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1, 1,
xlim = c(1, res[1]), ylim = c(1, res[2]),
asp = 1, type = 'n', xaxs = 'i',
yaxs = 'i', xaxt = 'n', yaxt = 'n',
xlab = '', ylab = '', bty = 'n', main = path)
rasterImage(jpg, 1, 1, res[1], res[2])
}
flagged_images <- HC %>%
filter(area_linear > 2.5e05)
basedir <- "../../../h2_fec_images/h2_thresh_images/"
if (nrow(flagged_images) >= 1) {
for (ii in 1:nrow(flagged_images)) {
print(plot_jpeg(paste0(basedir, flagged_images$camera_id[ii])))
}
}
Linear model predicting egg count from area:
fm_lm <- lm(handcount ~ area_linear - 1, data = HC)
new_data <- tibble(
area_linear = seq(0, 1.25 * max(c(HC$area_linear)),
length.out = 200))
new_data$lm_pred <- predict(fm_lm, newdata = new_data)
ggplot() +
geom_line(data = new_data, aes(x = area_linear, y = lm_pred)) +
geom_point(data = HC, aes(x = area_linear, y = handcount))
HC$lm_pred <- predict(fm_lm)
cor(HC$handcount, HC$lm_pred)
## [1] 0.8127161
suppressWarnings(
M <- read_excel("../../Data/Processed/feclife_with-image-ids.xlsx")
)
# Load areas
h2_fec_areas <- read_csv("../../Data/Processed/area_summation_linear_h2_fecimages.csv",
col_types = "cii")
M <- left_join(M, h2_fec_areas) %>%
rename(area_linear = area) %>%
drop_na(area_linear)
## Joining, by = "camera_id"
M$predicted_count_linear <- predict(fm_lm, newdata = M)
Check these. They are handcounted but in the set of analyzed images.
M %>%
filter(is.na(test_case)) %>%
filter(!is.na(handcount)) %>%
filter(handcount != "unassessable") %>%
filter(handcount != "unassigned") %>%
mutate(handcount = as.numeric(handcount)) %>%
select(camera_id, handcount, predicted_count_linear) %>%
filter(!is.na(predicted_count_linear))
## # A tibble: 8 x 3
## camera_id handcount predicted_count_linear
## <chr> <dbl> <dbl>
## 1 IMG_3720.JPG 956 1325
## 2 IMG_3780.JPG 900 1456
## 3 IMG_5041.JPG 3.00 23.3
## 4 IMG_5701.JPG 8.00 36.6
## 5 IMG_5706.JPG 0 39.3
## 6 IMG_5707.JPG 0 88.2
## 7 IMG_5730.JPG 7.00 120
## 8 IMG_5731.JPG 26.0 108
ggplot(M, aes(predicted_count_linear)) +
geom_histogram(bins = 30)
Check high and low predicted eggs
M %>%
select(camera_id, predicted_count_linear) %>%
arrange(desc(predicted_count_linear))
## # A tibble: 3,437 x 2
## camera_id predicted_count_linear
## <chr> <dbl>
## 1 IMG_3756.JPG 3265
## 2 IMG_2841.JPG 3013
## 3 IMG_2198.JPG 2933
## 4 IMG_2807.JPG 2915
## 5 IMG_2811.JPG 2756
## 6 IMG_2791.JPG 2755
## 7 IMG_2813.JPG 2748
## 8 IMG_2781.JPG 2723
## 9 IMG_1880.JPG 2714
## 10 IMG_2810.JPG 2671
## # ... with 3,427 more rows
M %>%
select(camera_id, predicted_count_linear) %>%
arrange(predicted_count_linear)
## # A tibble: 3,437 x 2
## camera_id predicted_count_linear
## <chr> <dbl>
## 1 IMG_2898.JPG 0.129
## 2 IMG_4209.JPG 0.198
## 3 IMG_5615.JPG 0.555
## 4 IMG_5379.JPG 0.631
## 5 IMG_4198.JPG 0.761
## 6 IMG_5610.JPG 0.806
## 7 IMG_5388.JPG 1.10
## 8 IMG_5397.JPG 1.10
## 9 IMG_5362.JPG 1.21
## 10 IMG_5519.JPG 1.41
## # ... with 3,427 more rows
M.low <- M %>%
select(camera_id, predicted_count_linear) %>%
arrange(predicted_count_linear)
M.low <- M.low[1:300,]
ee <- read_excel("../../Data/Processed/egg_notes_EGK.xlsx")
ee$Image <- paste("IMG_", ee$Image, ".JPG", sep = "")
ww <- which(M.low$camera_id %in% ee$Image)
M.low <- M.low[-ww,]
set.seed <- 8462
set.zeros <- M.low[sample(seq(1, nrow(M.low)), 10) , ]
plot_jpeg = function(path, add=FALSE) {
# https://stackoverflow.com/a/28729601/168137
require('jpeg')
jpg = readJPEG(path, native = TRUE) # read the file
res = dim(jpg)[2:1] # get the resolution, [x, y]
if (!add) # initialize an empty plot area if add==FALSE
plot(1, 1,
xlim = c(1, res[1]), ylim = c(1, res[2]),
asp = 1, type = 'n', xaxs = 'i',
yaxs = 'i', xaxt = 'n', yaxt = 'n',
xlab = '', ylab = '', bty = 'n', main = path)
rasterImage(jpg, 1, 1, res[1], res[2])
}
flagged_images <- M %>%
filter(predicted_count_linear > 2000)
basedir <- "../../../h2_fec_images/h2_thresh_images/"
if (nrow(flagged_images) >= 1) {
for (ii in 1:nrow(flagged_images)) {
print(plot_jpeg(paste0(basedir, flagged_images$camera_id[ii])))
}
}
## Loading required package: jpeg
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL